Overview of tutorial

This tutorial simulates a population effect size of Cohen’s d = 0.5 for different sample sizes, and examines the relationship between Cohen’s d, its 95% Confidence Interval, and the significance of the t-test’s p-value.

By the end of this lesson you should understand that p-values are re-expressions of the same information conveyed by Confidence Intervals, and that statistical power is a re-expression of the width of Confidence Intervals.

Citation & License

Citation:

Ian Hussey (2024) Improving your statistical inferences through simulation studies in R. https://github.com/ianhussey/simulation-course

License:

CC BY 4.0

Dependencies

library(tidyr)
library(dplyr)
library(purrr) 
library(stringr)
library(forcats)
library(ggplot2)
library(scales)
library(patchwork)
library(knitr)
library(kableExtra)
library(janitor)
library(effsize)

Simulation

# functions for simulation
generate_data <- function(n_per_condition,
                          mean_control,
                          mean_intervention,
                          sd) {
  
  data_control <- 
    tibble(condition = "control",
           score = rnorm(n = n_per_condition, mean = mean_control, sd = sd))
  
  data_intervention <- 
    tibble(condition = "intervention",
           score = rnorm(n = n_per_condition, mean = mean_intervention, sd = sd))
  
  data_combined <- bind_rows(data_control,
                             data_intervention) |>
    mutate(condition = fct_relevel(condition, "intervention", "control"))
  
  return(data_combined)
}

analyze <- function(data) {

  res_t_test <- t.test(formula = score ~ condition, 
                       data = data,
                       var.equal = TRUE,
                       alternative = "two.sided")
  
  res_cohens_d <- effsize::cohen.d(formula = score ~ condition,
                                   data = data,
                                   pooled = TRUE)
  
  res <- tibble(p = res_t_test$p.value, 
                cohens_d = res_cohens_d$estimate,
                cohens_d_ci_lower = res_cohens_d$conf.int[1],
                cohens_d_ci_upper = res_cohens_d$conf.int[2])

  return(res)
}


# set seed
set.seed(42)

# simulation parameters
experiment_parameters <- expand_grid(
  n_per_condition = seq(from = 10, to = 90, by = 10),
  mean_control = 0,
  mean_intervention = 0.5,
  sd = 1,
  iteration = 1:1000
) 

# run simulation
simulation <- experiment_parameters |>
  mutate(generated_data = pmap(list(n_per_condition, 
                                    mean_control,
                                    mean_intervention,
                                    sd),
                               generate_data)) |>
  mutate(results = pmap(list(generated_data),
                        analyze))

Cohen’s d by sample size

# wrangle
simulation |>
  unnest(results) |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d)) +
  geom_jitter(alpha = 0.25) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5")

Ordered by statistical significance

Let’s color the Cohen’s ds by their statistical significance.

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05) |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d, color = significant)) +
  geom_jitter(alpha = 0.25) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5")

Cohen’s d and its 95% CIs

Let’s add the 95% Confidence Intervals.

Randomly select one dataset per sample size

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05) |>
  group_by(n_per_condition) |>
  slice_sample(n = 1) |>
  ungroup() |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d, color = significant)) +
  geom_point() +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5\nOne randomly selected dataset per sample size")

All datasets

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05,
         total_n = paste("N =", n_per_condition*2),
         total_n = fct_reorder(total_n, as.numeric(str_extract(total_n, "\\d+")))) |>
  # plot
  ggplot(aes(iteration, cohens_d, color = significant)) +
  geom_point(alpha = 0.5) +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper), alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Iteration") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  facet_wrap(~ total_n, ncol = 3, scales = "free_y")

Ordered by Cohen’s d

The above plot is hard to understand. Let’s order the Cohen’s ds from smallest to largest.

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05,
         total_n = paste("N =", n_per_condition*2),
         total_n = fct_reorder(total_n, as.numeric(str_extract(total_n, "\\d+")))) |>
  arrange(n_per_condition, cohens_d) |>
  group_by(n_per_condition) |>
  mutate(rank = row_number()) |>
  ungroup() |>
  # plot
  ggplot(aes(rank, cohens_d, color = significant)) +
  geom_point() +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper), alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Ranked iteration") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  facet_wrap(~ total_n, ncol = 3)

  • Notice the relationship between 95% CI and p value significance.
  • A certain percentage of Cohen’s ds are green vs. blue. What is this percentage also called? What statistical property?

Mean Cohen’s d by sample size

Now let’s average over the Cohen’s ds in each condition to find the mean Cohen’s d and its mean 95% CIs.

# wrangle
simulation_summary <- simulation |>
  # unnest results
  unnest(results) |>
  group_by(n_per_condition) |>
  summarize(proportion_significant = mean(p < .05), 
            mean_cohens_d = mean(cohens_d),
            mean_cohens_d_ci_lower = mean(cohens_d_ci_lower),
            mean_cohens_d_ci_upper = mean(cohens_d_ci_upper)) |>
  mutate(centered_mean_cohens_d_ci_lower = mean_cohens_d_ci_lower - mean_cohens_d,
         centered_mean_cohens_d_ci_upper = mean_cohens_d_ci_upper - mean_cohens_d)

# plot results
p1 <- ggplot(simulation_summary, aes(n_per_condition*2, mean_cohens_d)) +
  geom_point() +
  geom_linerange(aes(ymin = mean_cohens_d_ci_lower, ymax = mean_cohens_d_ci_upper)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total sample size") +
  scale_y_continuous(breaks = breaks_pretty(n = 5),
                     #limits = c(0,1),
                     name = "Mean Cohen's d\n(and mean 95% CIs)") +
  theme_linedraw() +
  ggtitle("Population Cohen's d = 0.5")

p1

Mean Cohen’s d and power by sample size

p2 <- ggplot(simulation_summary, aes(n_per_condition*2, proportion_significant)) +
  geom_point() +
  geom_hline(yintercept = 0.05, linetype = "dotted") +
  geom_hline(yintercept = 0.80, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total sample size") +
  scale_y_continuous(breaks = breaks_pretty(n = 5),
                     limits = c(0,1),
                     name = "Proportion of significant\np-values") +
  theme_linedraw() 

p1 + p2 + plot_layout(ncol = 1)

Session info

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 15.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] effsize_0.8.1        janitor_2.2.1        kableExtra_1.4.0    
##  [4] knitr_1.49           patchwork_1.2.0.9000 scales_1.3.0        
##  [7] ggplot2_3.5.1        forcats_1.0.0        stringr_1.5.1       
## [10] purrr_1.0.4          dplyr_1.1.4          tidyr_1.3.1         
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6      jsonlite_1.8.9    compiler_4.3.3    tidyselect_1.2.1 
##  [5] xml2_1.3.6        snakecase_0.11.1  jquerylib_0.1.4   systemfonts_1.0.6
##  [9] yaml_2.3.10       fastmap_1.2.0     R6_2.6.1          generics_0.1.3   
## [13] tibble_3.2.1      munsell_0.5.1     lubridate_1.9.4   svglite_2.1.3    
## [17] bslib_0.8.0       pillar_1.10.1     rlang_1.1.5       cachem_1.1.0     
## [21] stringi_1.8.4     xfun_0.49         sass_0.4.9        timechange_0.3.0 
## [25] viridisLite_0.4.2 cli_3.6.4         withr_3.0.2       magrittr_2.0.3   
## [29] digest_0.6.37     grid_4.3.3        rstudioapi_0.17.1 lifecycle_1.0.4  
## [33] vctrs_0.6.5       evaluate_1.0.1    glue_1.8.0        farver_2.1.2     
## [37] colorspace_2.1-1  rmarkdown_2.29    tools_4.3.3       pkgconfig_2.0.3  
## [41] htmltools_0.5.8.1
---
title: "Understanding the link between p-values, Confidence Intervals, and power"
author: "Ian Hussey"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
  html_document:
    code_download: true
    code_folding: show
    highlight: haddock
    theme: flatly
    toc: yes
    toc_float: yes
---

# Overview of tutorial

This tutorial simulates a population effect size of Cohen's d = 0.5 for different sample sizes, and examines the relationship between Cohen's d, its 95% Confidence Interval, and the significance of the t-test's *p*-value. 

By the end of this lesson you should understand that *p*-values are re-expressions of the same information conveyed by Confidence Intervals, and that statistical power is a re-expression of the width of Confidence Intervals.

# Citation & License

Citation: 

Ian Hussey (2024) Improving your statistical inferences through simulation studies in R. https://github.com/ianhussey/simulation-course

License: 

[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/deed.en)

```{r, include=FALSE}

# set default chunk options
knitr::opts_chunk$set(message = FALSE,
                      warning = FALSE)

# disable scientific notation
options(scipen = 999) 

```

# Dependencies

```{r}

library(tidyr)
library(dplyr)
library(purrr) 
library(stringr)
library(forcats)
library(ggplot2)
library(scales)
library(patchwork)
library(knitr)
library(kableExtra)
library(janitor)
library(effsize)

```

# Simulation

```{r}

# functions for simulation
generate_data <- function(n_per_condition,
                          mean_control,
                          mean_intervention,
                          sd) {
  
  data_control <- 
    tibble(condition = "control",
           score = rnorm(n = n_per_condition, mean = mean_control, sd = sd))
  
  data_intervention <- 
    tibble(condition = "intervention",
           score = rnorm(n = n_per_condition, mean = mean_intervention, sd = sd))
  
  data_combined <- bind_rows(data_control,
                             data_intervention) |>
    mutate(condition = fct_relevel(condition, "intervention", "control"))
  
  return(data_combined)
}

analyze <- function(data) {

  res_t_test <- t.test(formula = score ~ condition, 
                       data = data,
                       var.equal = TRUE,
                       alternative = "two.sided")
  
  res_cohens_d <- effsize::cohen.d(formula = score ~ condition,
                                   data = data,
                                   pooled = TRUE)
  
  res <- tibble(p = res_t_test$p.value, 
                cohens_d = res_cohens_d$estimate,
                cohens_d_ci_lower = res_cohens_d$conf.int[1],
                cohens_d_ci_upper = res_cohens_d$conf.int[2])

  return(res)
}


# set seed
set.seed(42)

# simulation parameters
experiment_parameters <- expand_grid(
  n_per_condition = seq(from = 10, to = 90, by = 10),
  mean_control = 0,
  mean_intervention = 0.5,
  sd = 1,
  iteration = 1:1000
) 

# run simulation
simulation <- experiment_parameters |>
  mutate(generated_data = pmap(list(n_per_condition, 
                                    mean_control,
                                    mean_intervention,
                                    sd),
                               generate_data)) |>
  mutate(results = pmap(list(generated_data),
                        analyze))

```

# Cohen's d by sample size

```{r fig.height=5, fig.width=6}

# wrangle
simulation |>
  unnest(results) |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d)) +
  geom_jitter(alpha = 0.25) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5")

```

## Ordered by statistical significance

Let's color the Cohen's ds by their statistical significance. 

```{r fig.height=5, fig.width=6}

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05) |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d, color = significant)) +
  geom_jitter(alpha = 0.25) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5")

```

# Cohen's d and its 95% CIs 

Let's add the 95% Confidence Intervals.

## Randomly select one dataset per sample size

```{r}

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05) |>
  group_by(n_per_condition) |>
  slice_sample(n = 1) |>
  ungroup() |>
  # plot
  ggplot(aes(n_per_condition*2, cohens_d, color = significant)) +
  geom_point() +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total N") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  ggtitle("Population Cohen's d = 0.5\nOne randomly selected dataset per sample size")

```

## All datasets

```{r fig.height=10, fig.width=12}

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05,
         total_n = paste("N =", n_per_condition*2),
         total_n = fct_reorder(total_n, as.numeric(str_extract(total_n, "\\d+")))) |>
  # plot
  ggplot(aes(iteration, cohens_d, color = significant)) +
  geom_point(alpha = 0.5) +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper), alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Iteration") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  facet_wrap(~ total_n, ncol = 3, scales = "free_y")

```

## Ordered by Cohen's d

The above plot is hard to understand. Let's order the Cohen's ds from smallest to largest. 

```{r fig.height=10, fig.width=12}

# wrangle
simulation |>
  unnest(results) |>
  mutate(significant = p < .05,
         total_n = paste("N =", n_per_condition*2),
         total_n = fct_reorder(total_n, as.numeric(str_extract(total_n, "\\d+")))) |>
  arrange(n_per_condition, cohens_d) |>
  group_by(n_per_condition) |>
  mutate(rank = row_number()) |>
  ungroup() |>
  # plot
  ggplot(aes(rank, cohens_d, color = significant)) +
  geom_point() +
  geom_linerange(aes(ymin = cohens_d_ci_lower, ymax = cohens_d_ci_upper), alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Ranked iteration") +
  scale_y_continuous(breaks = breaks_pretty(n = 10),
                     #limits = c(0,1),
                     name = "Cohen's d") +
  theme_linedraw() +
  scale_color_viridis_d(begin = 0.3, end = 0.7) +
  facet_wrap(~ total_n, ncol = 3)

```

- Notice the relationship between 95% CI and p value significance.
- A certain percentage of Cohen's ds are green vs. blue. What is this percentage also called? What statistical property?

# Mean Cohen's d by sample size

Now let's average over the Cohen's ds in each condition to find the mean Cohen's d and its mean 95% CIs.

```{r fig.height=2.5, fig.width=6}

# wrangle
simulation_summary <- simulation |>
  # unnest results
  unnest(results) |>
  group_by(n_per_condition) |>
  summarize(proportion_significant = mean(p < .05), 
            mean_cohens_d = mean(cohens_d),
            mean_cohens_d_ci_lower = mean(cohens_d_ci_lower),
            mean_cohens_d_ci_upper = mean(cohens_d_ci_upper)) |>
  mutate(centered_mean_cohens_d_ci_lower = mean_cohens_d_ci_lower - mean_cohens_d,
         centered_mean_cohens_d_ci_upper = mean_cohens_d_ci_upper - mean_cohens_d)

# plot results
p1 <- ggplot(simulation_summary, aes(n_per_condition*2, mean_cohens_d)) +
  geom_point() +
  geom_linerange(aes(ymin = mean_cohens_d_ci_lower, ymax = mean_cohens_d_ci_upper)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total sample size") +
  scale_y_continuous(breaks = breaks_pretty(n = 5),
                     #limits = c(0,1),
                     name = "Mean Cohen's d\n(and mean 95% CIs)") +
  theme_linedraw() +
  ggtitle("Population Cohen's d = 0.5")

p1

```

# Mean Cohen's d and power by sample size

```{r fig.height=5, fig.width=6}

p2 <- ggplot(simulation_summary, aes(n_per_condition*2, proportion_significant)) +
  geom_point() +
  geom_hline(yintercept = 0.05, linetype = "dotted") +
  geom_hline(yintercept = 0.80, linetype = "dotted") +
  scale_x_continuous(breaks = breaks_pretty(n = 10),
                     name = "Total sample size") +
  scale_y_continuous(breaks = breaks_pretty(n = 5),
                     limits = c(0,1),
                     name = "Proportion of significant\np-values") +
  theme_linedraw() 

p1 + p2 + plot_layout(ncol = 1)

```

# Session info

```{r}

sessionInfo()

```


